{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "d8a745a2",
   "metadata": {},
   "outputs": [],
   "source": [
    "my_func =lambda x: 2*x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "63ab20e3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "6"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "my_func(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "4ffb47c4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[0, 2, 4, 6, 8]"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "[(lambda x:2*x)(i) for i in range(5)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "e7922c7d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[0, 2, 4, 6, 8]"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "list (map(lambda x: 2*x,range(5)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "32c4bfe4",
   "metadata": {},
   "outputs": [],
   "source": [
    "name = ['小明','小红','小黄']\n",
    "age = [18,19,17]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "89dcc57e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['小明_18', '小红_19', '小黄_17']"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "list(map(lambda x,y:x+'_'+str(y),name,age))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "fe85a920",
   "metadata": {},
   "source": [
    "##  zip对象与enumerate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "d2b3e151",
   "metadata": {},
   "outputs": [],
   "source": [
    " l1,l2,l3=list('abc'),list('def'),list('hij')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "de1f5a9b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('a', 'd', 'h'), ('b', 'e', 'i'), ('c', 'f', 'j')]"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "list(zip(l1,l2,l3))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "232ac1ba",
   "metadata": {},
   "source": [
    "##  文件读取"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "ce27f6d7",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "0321a510",
   "metadata": {},
   "outputs": [],
   "source": [
    "file_name = 'xxx.xlsx'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "d10ae07e",
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>col1</th>\n",
       "      <th>col2</th>\n",
       "      <th>col3</th>\n",
       "      <th>col4</th>\n",
       "      <th>col5</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>a</td>\n",
       "      <td>1.4</td>\n",
       "      <td>apple</td>\n",
       "      <td>2020/1/1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>3</td>\n",
       "      <td>b</td>\n",
       "      <td>3.4</td>\n",
       "      <td>banana</td>\n",
       "      <td>2020/1/2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>6</td>\n",
       "      <td>c</td>\n",
       "      <td>2.5</td>\n",
       "      <td>orange</td>\n",
       "      <td>2020/1/5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>5</td>\n",
       "      <td>d</td>\n",
       "      <td>3.2</td>\n",
       "      <td>lemon</td>\n",
       "      <td>2020/1/7</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   col1 col2  col3    col4      col5\n",
       "0     2    a   1.4   apple  2020/1/1\n",
       "1     3    b   3.4  banana  2020/1/2\n",
       "2     6    c   2.5  orange  2020/1/5\n",
       "3     5    d   3.2   lemon  2020/1/7"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.read_csv('E:\\浏览器下载\\data_analysis-master\\data_analysis-master\\week02\\data/my_csv.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "64d3b4e4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>col1</th>\n",
       "      <th>col2</th>\n",
       "      <th>col3</th>\n",
       "      <th>col4</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>a</td>\n",
       "      <td>1.4</td>\n",
       "      <td>apple 2020/1/1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>3</td>\n",
       "      <td>b</td>\n",
       "      <td>3.4</td>\n",
       "      <td>banana 2020/1/2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>6</td>\n",
       "      <td>c</td>\n",
       "      <td>2.5</td>\n",
       "      <td>orange 2020/1/5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>5</td>\n",
       "      <td>d</td>\n",
       "      <td>3.2</td>\n",
       "      <td>lemon 2020/1/7</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   col1 col2  col3             col4\n",
       "0     2    a   1.4   apple 2020/1/1\n",
       "1     3    b   3.4  banana 2020/1/2\n",
       "2     6    c   2.5  orange 2020/1/5\n",
       "3     5    d   3.2   lemon 2020/1/7"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.read_table(\"E:\\浏览器下载\\data_analysis-master\\data_analysis-master\\week02\\data\\my_txt_saved.txt\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "02da4b8f",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "ename": "ImportError",
     "evalue": "Missing optional dependency 'openpyxl'.  Use pip or conda to install openpyxl.",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
      "File \u001b[1;32mE:\\python\\lib\\site-packages\\pandas\\compat\\_optional.py:141\u001b[0m, in \u001b[0;36mimport_optional_dependency\u001b[1;34m(name, extra, errors, min_version)\u001b[0m\n\u001b[0;32m    140\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 141\u001b[0m     module \u001b[38;5;241m=\u001b[39m \u001b[43mimportlib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mimport_module\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    142\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m:\n",
      "File \u001b[1;32mE:\\python\\lib\\importlib\\__init__.py:126\u001b[0m, in \u001b[0;36mimport_module\u001b[1;34m(name, package)\u001b[0m\n\u001b[0;32m    125\u001b[0m         level \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m--> 126\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_bootstrap\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_gcd_import\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m[\u001b[49m\u001b[43mlevel\u001b[49m\u001b[43m:\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpackage\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlevel\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[1;32m<frozen importlib._bootstrap>:1050\u001b[0m, in \u001b[0;36m_gcd_import\u001b[1;34m(name, package, level)\u001b[0m\n",
      "File \u001b[1;32m<frozen importlib._bootstrap>:1027\u001b[0m, in \u001b[0;36m_find_and_load\u001b[1;34m(name, import_)\u001b[0m\n",
      "File \u001b[1;32m<frozen importlib._bootstrap>:1004\u001b[0m, in \u001b[0;36m_find_and_load_unlocked\u001b[1;34m(name, import_)\u001b[0m\n",
      "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'openpyxl'",
      "\nDuring handling of the above exception, another exception occurred:\n",
      "\u001b[1;31mImportError\u001b[0m                               Traceback (most recent call last)",
      "Cell \u001b[1;32mIn[47], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_excel\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mE:\u001b[39;49m\u001b[38;5;124;43m\\\u001b[39;49m\u001b[38;5;124;43m浏览器下载\u001b[39;49m\u001b[38;5;124;43m\\\u001b[39;49m\u001b[38;5;124;43mdata_analysis-master\u001b[39;49m\u001b[38;5;124;43m\\\u001b[39;49m\u001b[38;5;124;43mdata_analysis-master\u001b[39;49m\u001b[38;5;124;43m\\\u001b[39;49m\u001b[38;5;124;43mweek02\u001b[39;49m\u001b[38;5;124;43m\\\u001b[39;49m\u001b[38;5;124;43mdata\u001b[39;49m\u001b[38;5;124;43m\\\u001b[39;49m\u001b[38;5;124;43mmy_excel.xlsx\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
      "File \u001b[1;32mE:\\python\\lib\\site-packages\\pandas\\util\\_decorators.py:211\u001b[0m, in \u001b[0;36mdeprecate_kwarg.<locals>._deprecate_kwarg.<locals>.wrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m    209\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m    210\u001b[0m         kwargs[new_arg_name] \u001b[38;5;241m=\u001b[39m new_arg_value\n\u001b[1;32m--> 211\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m func(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
      "File \u001b[1;32mE:\\python\\lib\\site-packages\\pandas\\util\\_decorators.py:331\u001b[0m, in \u001b[0;36mdeprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m    325\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(args) \u001b[38;5;241m>\u001b[39m num_allow_args:\n\u001b[0;32m    326\u001b[0m     warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[0;32m    327\u001b[0m         msg\u001b[38;5;241m.\u001b[39mformat(arguments\u001b[38;5;241m=\u001b[39m_format_argument_list(allow_args)),\n\u001b[0;32m    328\u001b[0m         \u001b[38;5;167;01mFutureWarning\u001b[39;00m,\n\u001b[0;32m    329\u001b[0m         stacklevel\u001b[38;5;241m=\u001b[39mfind_stack_level(),\n\u001b[0;32m    330\u001b[0m     )\n\u001b[1;32m--> 331\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m func(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
      "File \u001b[1;32mE:\\python\\lib\\site-packages\\pandas\\io\\excel\\_base.py:482\u001b[0m, in \u001b[0;36mread_excel\u001b[1;34m(io, sheet_name, header, names, index_col, usecols, squeeze, dtype, engine, converters, true_values, false_values, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, parse_dates, date_parser, thousands, decimal, comment, skipfooter, convert_float, mangle_dupe_cols, storage_options)\u001b[0m\n\u001b[0;32m    480\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(io, ExcelFile):\n\u001b[0;32m    481\u001b[0m     should_close \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m--> 482\u001b[0m     io \u001b[38;5;241m=\u001b[39m \u001b[43mExcelFile\u001b[49m\u001b[43m(\u001b[49m\u001b[43mio\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstorage_options\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mengine\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mengine\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    483\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m engine \u001b[38;5;129;01mand\u001b[39;00m engine \u001b[38;5;241m!=\u001b[39m io\u001b[38;5;241m.\u001b[39mengine:\n\u001b[0;32m    484\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m    485\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mEngine should not be specified when passing \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    486\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124man ExcelFile - ExcelFile already has the engine set\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    487\u001b[0m     )\n",
      "File \u001b[1;32mE:\\python\\lib\\site-packages\\pandas\\io\\excel\\_base.py:1695\u001b[0m, in \u001b[0;36mExcelFile.__init__\u001b[1;34m(self, path_or_buffer, engine, storage_options)\u001b[0m\n\u001b[0;32m   1692\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mengine \u001b[38;5;241m=\u001b[39m engine\n\u001b[0;32m   1693\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstorage_options \u001b[38;5;241m=\u001b[39m storage_options\n\u001b[1;32m-> 1695\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_reader \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_engines\u001b[49m\u001b[43m[\u001b[49m\u001b[43mengine\u001b[49m\u001b[43m]\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_io\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstorage_options\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[1;32mE:\\python\\lib\\site-packages\\pandas\\io\\excel\\_openpyxl.py:556\u001b[0m, in \u001b[0;36mOpenpyxlReader.__init__\u001b[1;34m(self, filepath_or_buffer, storage_options)\u001b[0m\n\u001b[0;32m    541\u001b[0m \u001b[38;5;129m@doc\u001b[39m(storage_options\u001b[38;5;241m=\u001b[39m_shared_docs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstorage_options\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n\u001b[0;32m    542\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\n\u001b[0;32m    543\u001b[0m     \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m    544\u001b[0m     filepath_or_buffer: FilePath \u001b[38;5;241m|\u001b[39m ReadBuffer[\u001b[38;5;28mbytes\u001b[39m],\n\u001b[0;32m    545\u001b[0m     storage_options: StorageOptions \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[0;32m    546\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m    547\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m    548\u001b[0m \u001b[38;5;124;03m    Reader using openpyxl engine.\u001b[39;00m\n\u001b[0;32m    549\u001b[0m \n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    554\u001b[0m \u001b[38;5;124;03m    {storage_options}\u001b[39;00m\n\u001b[0;32m    555\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[1;32m--> 556\u001b[0m     \u001b[43mimport_optional_dependency\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mopenpyxl\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[0;32m    557\u001b[0m     \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m(filepath_or_buffer, storage_options\u001b[38;5;241m=\u001b[39mstorage_options)\n",
      "File \u001b[1;32mE:\\python\\lib\\site-packages\\pandas\\compat\\_optional.py:144\u001b[0m, in \u001b[0;36mimport_optional_dependency\u001b[1;34m(name, extra, errors, min_version)\u001b[0m\n\u001b[0;32m    142\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m:\n\u001b[0;32m    143\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m errors \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mraise\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m--> 144\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m(msg)\n\u001b[0;32m    145\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m    146\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n",
      "\u001b[1;31mImportError\u001b[0m: Missing optional dependency 'openpyxl'.  Use pip or conda to install openpyxl."
     ]
    }
   ],
   "source": [
    "pd.read_excel(\"E:\\浏览器下载\\data_analysis-master\\data_analysis-master\\week02\\data\\my_excel.xlsx\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e6e4a76a",
   "metadata": {},
   "source": [
    "##  重要的文件读取方法(特殊格式文件)\n",
    "sep"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "d1fdaa99",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>col1 , col2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>TS , This is an apple.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>GQ , My name is Bob.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>WT , Well done!</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>PT , May I help you?</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              col1 , col2\n",
       "0  TS , This is an apple.\n",
       "1    GQ , My name is Bob.\n",
       "2         WT , Well done!\n",
       "3    PT , May I help you?"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.read_table(\"E:\\浏览器下载\\data_analysis-master\\data_analysis-master\\week02\\data\\my_table_special_sep.txt\",sep='\\|\\|\\|',engine='python')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "47ffb4b5",
   "metadata": {},
   "source": [
    "## 文件写入"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "b5fee3cb",
   "metadata": {},
   "outputs": [],
   "source": [
    "huren = pd.read_html('https://hurun.net/zh-CN/Info/Detail?num=L9SQPH9FKJB1')[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "90f48a01",
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'huren' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "Cell \u001b[1;32mIn[26], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mhuren\u001b[49m\u001b[38;5;241m.\u001b[39mto_excel(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mhuren.xlsx\u001b[39m\u001b[38;5;124m'\u001b[39m,index \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m)\n",
      "\u001b[1;31mNameError\u001b[0m: name 'huren' is not defined"
     ]
    }
   ],
   "source": [
    "huren.to_excel('huren.xlsx',index = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "c3c49e1c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>7</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>排名</td>\n",
       "      <td>排名变化</td>\n",
       "      <td>企业</td>\n",
       "      <td>价值（亿元人民币）</td>\n",
       "      <td>价值变化（亿元人民币）</td>\n",
       "      <td>总部</td>\n",
       "      <td>行业</td>\n",
       "      <td>成立年份</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>抖音</td>\n",
       "      <td>13400</td>\n",
       "      <td>-10050</td>\n",
       "      <td>北京</td>\n",
       "      <td>社交媒体</td>\n",
       "      <td>2012</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>SpaceX</td>\n",
       "      <td>8400</td>\n",
       "      <td>1680</td>\n",
       "      <td>洛杉矶</td>\n",
       "      <td>航天</td>\n",
       "      <td>2002</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>-1</td>\n",
       "      <td>蚂蚁集团</td>\n",
       "      <td>8000</td>\n",
       "      <td>-2010</td>\n",
       "      <td>杭州</td>\n",
       "      <td>金融科技</td>\n",
       "      <td>2014</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>Stripe</td>\n",
       "      <td>4100</td>\n",
       "      <td>-2230</td>\n",
       "      <td>旧金山</td>\n",
       "      <td>金融科技</td>\n",
       "      <td>2010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>5</td>\n",
       "      <td>11</td>\n",
       "      <td>Shein</td>\n",
       "      <td>4000</td>\n",
       "      <td>2680</td>\n",
       "      <td>广州</td>\n",
       "      <td>电子商务</td>\n",
       "      <td>2012</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>6</td>\n",
       "      <td>15</td>\n",
       "      <td>币安</td>\n",
       "      <td>3000</td>\n",
       "      <td>2010</td>\n",
       "      <td>马耳他</td>\n",
       "      <td>区块链</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>Databricks</td>\n",
       "      <td>2500</td>\n",
       "      <td>0</td>\n",
       "      <td>旧金山</td>\n",
       "      <td>大数据</td>\n",
       "      <td>2013</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>8</td>\n",
       "      <td>3</td>\n",
       "      <td>微众银行</td>\n",
       "      <td>2200</td>\n",
       "      <td>200</td>\n",
       "      <td>深圳</td>\n",
       "      <td>金融科技</td>\n",
       "      <td>2014</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>9</td>\n",
       "      <td>2</td>\n",
       "      <td>京东科技</td>\n",
       "      <td>2000</td>\n",
       "      <td>0</td>\n",
       "      <td>北京</td>\n",
       "      <td>数字科技</td>\n",
       "      <td>2013</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "      <td>Checkout.com</td>\n",
       "      <td>1900</td>\n",
       "      <td>870</td>\n",
       "      <td>伦敦</td>\n",
       "      <td>金融科技</td>\n",
       "      <td>2012</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     0     1             2          3            4    5     6     7\n",
       "0   排名  排名变化            企业  价值（亿元人民币）  价值变化（亿元人民币）   总部    行业  成立年份\n",
       "1    1     0            抖音      13400       -10050   北京  社交媒体  2012\n",
       "2    2     1        SpaceX       8400         1680  洛杉矶    航天  2002\n",
       "3    3    -1          蚂蚁集团       8000        -2010   杭州  金融科技  2014\n",
       "4    4     0        Stripe       4100        -2230  旧金山  金融科技  2010\n",
       "5    5    11         Shein       4000         2680   广州  电子商务  2012\n",
       "6    6    15            币安       3000         2010  马耳他   区块链  2017\n",
       "7    7     1    Databricks       2500            0  旧金山   大数据  2013\n",
       "8    8     3          微众银行       2200          200   深圳  金融科技  2014\n",
       "9    9     2          京东科技       2000            0   北京  数字科技  2013\n",
       "10  10    11  Checkout.com       1900          870   伦敦  金融科技  2012"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_hurun = pd.read_html('https://hurun.net/zh-CN/Info/Detail?num=L9SQPH9FKJB1')[0]\n",
    "df_hurun"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "83ae5d3a",
   "metadata": {},
   "outputs": [
    {
     "ename": "ModuleNotFoundError",
     "evalue": "No module named 'openpyxl'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
      "Cell \u001b[1;32mIn[28], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mdf_hurun\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_excel\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43moutput_hurun.xlsx\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43mindex\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\n",
      "File \u001b[1;32mE:\\python\\lib\\site-packages\\pandas\\util\\_decorators.py:211\u001b[0m, in \u001b[0;36mdeprecate_kwarg.<locals>._deprecate_kwarg.<locals>.wrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m    209\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m    210\u001b[0m         kwargs[new_arg_name] \u001b[38;5;241m=\u001b[39m new_arg_value\n\u001b[1;32m--> 211\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m func(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
      "File \u001b[1;32mE:\\python\\lib\\site-packages\\pandas\\util\\_decorators.py:211\u001b[0m, in \u001b[0;36mdeprecate_kwarg.<locals>._deprecate_kwarg.<locals>.wrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m    209\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m    210\u001b[0m         kwargs[new_arg_name] \u001b[38;5;241m=\u001b[39m new_arg_value\n\u001b[1;32m--> 211\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m func(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
      "File \u001b[1;32mE:\\python\\lib\\site-packages\\pandas\\core\\generic.py:2374\u001b[0m, in \u001b[0;36mNDFrame.to_excel\u001b[1;34m(self, excel_writer, sheet_name, na_rep, float_format, columns, header, index, index_label, startrow, startcol, engine, merge_cells, encoding, inf_rep, verbose, freeze_panes, storage_options)\u001b[0m\n\u001b[0;32m   2361\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mio\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mformats\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mexcel\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ExcelFormatter\n\u001b[0;32m   2363\u001b[0m formatter \u001b[38;5;241m=\u001b[39m ExcelFormatter(\n\u001b[0;32m   2364\u001b[0m     df,\n\u001b[0;32m   2365\u001b[0m     na_rep\u001b[38;5;241m=\u001b[39mna_rep,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m   2372\u001b[0m     inf_rep\u001b[38;5;241m=\u001b[39minf_rep,\n\u001b[0;32m   2373\u001b[0m )\n\u001b[1;32m-> 2374\u001b[0m \u001b[43mformatter\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwrite\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m   2375\u001b[0m \u001b[43m    \u001b[49m\u001b[43mexcel_writer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   2376\u001b[0m \u001b[43m    \u001b[49m\u001b[43msheet_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msheet_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   2377\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstartrow\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstartrow\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   2378\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstartcol\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstartcol\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   2379\u001b[0m \u001b[43m    \u001b[49m\u001b[43mfreeze_panes\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfreeze_panes\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   2380\u001b[0m \u001b[43m    \u001b[49m\u001b[43mengine\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mengine\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   2381\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstorage_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   2382\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[1;32mE:\\python\\lib\\site-packages\\pandas\\io\\formats\\excel.py:944\u001b[0m, in \u001b[0;36mExcelFormatter.write\u001b[1;34m(self, writer, sheet_name, startrow, startcol, freeze_panes, engine, storage_options)\u001b[0m\n\u001b[0;32m    940\u001b[0m     need_save \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[0;32m    941\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m    942\u001b[0m     \u001b[38;5;66;03m# error: Cannot instantiate abstract class 'ExcelWriter' with abstract\u001b[39;00m\n\u001b[0;32m    943\u001b[0m     \u001b[38;5;66;03m# attributes 'engine', 'save', 'supported_extensions' and 'write_cells'\u001b[39;00m\n\u001b[1;32m--> 944\u001b[0m     writer \u001b[38;5;241m=\u001b[39m \u001b[43mExcelWriter\u001b[49m\u001b[43m(\u001b[49m\u001b[43m  \u001b[49m\u001b[38;5;66;43;03m# type: ignore[abstract]\u001b[39;49;00m\n\u001b[0;32m    945\u001b[0m \u001b[43m        \u001b[49m\u001b[43mwriter\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mengine\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mengine\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstorage_options\u001b[49m\n\u001b[0;32m    946\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    947\u001b[0m     need_save \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m    949\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n",
      "File \u001b[1;32mE:\\python\\lib\\site-packages\\pandas\\io\\excel\\_openpyxl.py:56\u001b[0m, in \u001b[0;36mOpenpyxlWriter.__init__\u001b[1;34m(self, path, engine, date_format, datetime_format, mode, storage_options, if_sheet_exists, engine_kwargs, **kwargs)\u001b[0m\n\u001b[0;32m     43\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\n\u001b[0;32m     44\u001b[0m     \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m     45\u001b[0m     path: FilePath \u001b[38;5;241m|\u001b[39m WriteExcelBuffer \u001b[38;5;241m|\u001b[39m ExcelWriter,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m     54\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m     55\u001b[0m     \u001b[38;5;66;03m# Use the openpyxl module as the Excel writer.\u001b[39;00m\n\u001b[1;32m---> 56\u001b[0m     \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mopenpyxl\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mworkbook\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Workbook\n\u001b[0;32m     58\u001b[0m     engine_kwargs \u001b[38;5;241m=\u001b[39m combine_kwargs(engine_kwargs, kwargs)\n\u001b[0;32m     60\u001b[0m     \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m(\n\u001b[0;32m     61\u001b[0m         path,\n\u001b[0;32m     62\u001b[0m         mode\u001b[38;5;241m=\u001b[39mmode,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m     65\u001b[0m         engine_kwargs\u001b[38;5;241m=\u001b[39mengine_kwargs,\n\u001b[0;32m     66\u001b[0m     )\n",
      "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'openpyxl'"
     ]
    }
   ],
   "source": [
    "df_hurun.to_excel('output_hurun.xlsx',index = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "080ef918",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
